1 Materials

1.1 Data

options(timeout = 10000)
dir.create("data/steinbock/raw", recursive = TRUE)
## Warning in dir.create("data/steinbock/raw", recursive = TRUE): 'data/steinbock/
## raw' already exists
download.file("https://zenodo.org/record/6642699/files/panel.csv",
              "data/steinbock/panel.csv")
download.file("https://zenodo.org/record/5949116/files/Patient1.zip",
              "data/steinbock/raw/Patient1.zip")
download.file("https://zenodo.org/record/5949116/files/Patient2.zip",
              "data/steinbock/raw/Patient2.zip")
download.file("https://zenodo.org/record/5949116/files/Patient3.zip",
              "data/steinbock/raw/Patient3.zip")
download.file("https://zenodo.org/record/5949116/files/Patient4.zip",
              "data/steinbock/raw/Patient4.zip")
download.file("https://zenodo.org/record/5949116/files/compensation.zip",
              "data/compensation.zip")
unzip("data/compensation.zip", exdir="data", overwrite=TRUE)
unlink("data/compensation.zip")
download.file("https://zenodo.org/record/5949116/files/sample_metadata.xlsx", 
         destfile = "data/sample_metadata.xlsx")
download.file("https://zenodo.org/record/7079294/files/gated_cells.zip",
              "data/gated_cells.zip")
unzip("data/gated_cells.zip", exdir="data", overwrite=TRUE)
unlink("data/gated_cells.zip")

2 Equipment setup

2.1 Installation instructions

if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")

devtools::install_github(c("BodenmillerGroup/imcRtools", 
                           "BodenmillerGroup/cytomapper"))


if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")

BiocManager::install(c("pheatmap", "viridis",
                       "tiff", "distill", "openxlsx", "ggrepel", "patchwork",
                       "mclust", "RColorBrewer", "uwot", "Rtsne", "caret",                                               
                       "randomForest", "ggridges", "gridGraphics", "scales", 
                       "CATALYST", "scuttle", "scater", "dittoSeq", 
                       "tidyverse", "batchelor", "bluster","scran"))

3 Procedure

3.1 Preprocessing

steinbock preprocess imc images --hpf 50
## bash: no job control in this shell
## 2022-09-27 14:07:32,574 INFO steinbock - img/Patient4_005.tiff
## 2022-09-27 14:07:35,642 INFO steinbock - img/Patient4_006.tiff
## 2022-09-27 14:07:37,683 INFO steinbock - img/Patient4_007.tiff
## 2022-09-27 14:07:40,122 INFO steinbock - img/Patient4_008.tiff
## 2022-09-27 14:07:42,320 INFO steinbock - img/Patient3_001.tiff
## 2022-09-27 14:07:44,818 INFO steinbock - img/Patient3_002.tiff
## 2022-09-27 14:07:51,711 INFO steinbock - img/Patient3_003.tiff
## 2022-09-27 14:08:04,518 INFO steinbock - img/Patient2_001.tiff
## 2022-09-27 14:08:06,077 INFO steinbock - img/Patient2_002.tiff
## 2022-09-27 14:08:09,393 INFO steinbock - img/Patient2_003.tiff
## 2022-09-27 14:08:12,661 INFO steinbock - img/Patient2_004.tiff
## 2022-09-27 14:08:15,646 INFO steinbock - img/Patient1_001.tiff
## 2022-09-27 14:08:19,383 INFO steinbock - img/Patient1_002.tiff
## 2022-09-27 14:08:21,434 INFO steinbock - img/Patient1_003.tiff
## 2022-09-27 14:08:35,616 INFO steinbock - images.csv

The step took 1.96 minutes.

steinbock segment deepcell --minmax
## bash: no job control in this shell
## /opt/venv/lib/python3.8/site-packages/deepcell_toolbox/deep_watershed.py:179: FutureWarning: `selem` is a deprecated argument name for `h_maxima`. It will be removed in version 1.0. Please use `footprint` instead.
##   markers = h_maxima(image=maxima,
## 2022-09-27 14:09:31,316 INFO steinbock - masks/Patient1_001.tiff
## 2022-09-27 14:09:50,100 INFO steinbock - masks/Patient1_002.tiff
## 2022-09-27 14:10:16,780 INFO steinbock - masks/Patient1_003.tiff
## 2022-09-27 14:10:35,263 INFO steinbock - masks/Patient2_001.tiff
## 2022-09-27 14:10:58,705 INFO steinbock - masks/Patient2_002.tiff
## 2022-09-27 14:11:26,376 INFO steinbock - masks/Patient2_003.tiff
## 2022-09-27 14:11:52,986 INFO steinbock - masks/Patient2_004.tiff
## 2022-09-27 14:12:23,620 INFO steinbock - masks/Patient3_001.tiff
## 2022-09-27 14:12:50,053 INFO steinbock - masks/Patient3_002.tiff
## 2022-09-27 14:13:19,963 INFO steinbock - masks/Patient3_003.tiff
## 2022-09-27 14:13:45,334 INFO steinbock - masks/Patient4_005.tiff
## 2022-09-27 14:14:10,888 INFO steinbock - masks/Patient4_006.tiff
## 2022-09-27 14:14:33,406 INFO steinbock - masks/Patient4_007.tiff
## 2022-09-27 14:14:57,807 INFO steinbock - masks/Patient4_008.tiff

The step took 6.48 minutes.

steinbock measure intensities
## bash: no job control in this shell
## 2022-09-27 14:15:13,624 INFO steinbock - intensities/Patient1_001.csv
## 2022-09-27 14:15:15,273 INFO steinbock - intensities/Patient1_002.csv
## 2022-09-27 14:15:18,603 INFO steinbock - intensities/Patient1_003.csv
## 2022-09-27 14:15:19,997 INFO steinbock - intensities/Patient2_001.csv
## 2022-09-27 14:15:21,412 INFO steinbock - intensities/Patient2_002.csv
## 2022-09-27 14:15:22,845 INFO steinbock - intensities/Patient2_003.csv
## 2022-09-27 14:15:26,029 INFO steinbock - intensities/Patient2_004.csv
## 2022-09-27 14:15:28,630 INFO steinbock - intensities/Patient3_001.csv
## 2022-09-27 14:15:30,687 INFO steinbock - intensities/Patient3_002.csv
## 2022-09-27 14:15:32,342 INFO steinbock - intensities/Patient3_003.csv
## 2022-09-27 14:15:33,638 INFO steinbock - intensities/Patient4_005.csv
## 2022-09-27 14:15:35,263 INFO steinbock - intensities/Patient4_006.csv
## 2022-09-27 14:15:37,000 INFO steinbock - intensities/Patient4_007.csv
## 2022-09-27 14:15:38,335 INFO steinbock - intensities/Patient4_008.csv

The step took 0.54 minutes.

steinbock measure regionprops
## bash: no job control in this shell
## 2022-09-27 14:15:44,211 INFO steinbock - regionprops/Patient1_001.csv
## 2022-09-27 14:15:46,308 INFO steinbock - regionprops/Patient1_002.csv
## 2022-09-27 14:15:48,631 INFO steinbock - regionprops/Patient1_003.csv
## 2022-09-27 14:15:50,725 INFO steinbock - regionprops/Patient2_001.csv
## 2022-09-27 14:15:52,862 INFO steinbock - regionprops/Patient2_002.csv
## 2022-09-27 14:15:54,442 INFO steinbock - regionprops/Patient2_003.csv
## 2022-09-27 14:15:58,003 INFO steinbock - regionprops/Patient2_004.csv
## 2022-09-27 14:16:00,138 INFO steinbock - regionprops/Patient3_001.csv
## 2022-09-27 14:16:02,029 INFO steinbock - regionprops/Patient3_002.csv
## 2022-09-27 14:16:03,929 INFO steinbock - regionprops/Patient3_003.csv
## 2022-09-27 14:16:05,437 INFO steinbock - regionprops/Patient4_005.csv
## 2022-09-27 14:16:07,950 INFO steinbock - regionprops/Patient4_006.csv
## 2022-09-27 14:16:09,749 INFO steinbock - regionprops/Patient4_007.csv
## 2022-09-27 14:16:11,344 INFO steinbock - regionprops/Patient4_008.csv

The step took 0.55 minutes.

steinbock measure neighbors --type expansion --dmax 4
## bash: no job control in this shell
## 2022-09-27 14:16:18,146 INFO steinbock - neighbors/Patient1_001.csv
## 2022-09-27 14:16:22,073 INFO steinbock - neighbors/Patient1_002.csv
## 2022-09-27 14:16:26,821 INFO steinbock - neighbors/Patient1_003.csv
## 2022-09-27 14:16:30,658 INFO steinbock - neighbors/Patient2_001.csv
## 2022-09-27 14:16:34,024 INFO steinbock - neighbors/Patient2_002.csv
## 2022-09-27 14:16:37,350 INFO steinbock - neighbors/Patient2_003.csv
## 2022-09-27 14:16:42,074 INFO steinbock - neighbors/Patient2_004.csv
## 2022-09-27 14:16:47,165 INFO steinbock - neighbors/Patient3_001.csv
## 2022-09-27 14:16:51,557 INFO steinbock - neighbors/Patient3_002.csv
## 2022-09-27 14:16:56,411 INFO steinbock - neighbors/Patient3_003.csv
## 2022-09-27 14:16:59,925 INFO steinbock - neighbors/Patient4_005.csv
## 2022-09-27 14:17:05,593 INFO steinbock - neighbors/Patient4_006.csv
## 2022-09-27 14:17:10,029 INFO steinbock - neighbors/Patient4_007.csv
## 2022-09-27 14:17:13,830 INFO steinbock - neighbors/Patient4_008.csv

The step took 1.04 minutes.

3.2 Reading in data

library(imcRtools)
spe <- read_steinbock("data/steinbock/")

The step took 0.92 minutes.

library(openxlsx)
library(tidyverse)
colnames(spe) <- paste0(spe$sample_id, "_", spe$ObjectNumber)

meta <- read.xlsx("data/sample_metadata.xlsx")
spe$patient_id <- as.vector(str_extract_all(spe$sample_id, "Patient[1-4]", 
   simplify = TRUE))
spe$ROI <- as.vector(str_extract_all(spe$sample_id, "00[1-8]",  
   simplify = TRUE))
spe$indication <- meta$Indication[match(spe$patient_id, meta$Sample.ID)]

rowData(spe)$use_channel <- !grepl("DNA|Histone", rownames(spe))

assay(spe, "exprs") <- asinh(counts(spe)/1)

The step took 0.03 minutes.

library(cytomapper)
images <- loadImages("data/steinbock/img/")
channelNames(images) <- rownames(spe)

The step took 0.27 minutes.

masks <- loadImages("data/steinbock/masks/", as.is = TRUE)
## All files in the provided location will be read in.

The step took 0 minutes.

patient_id <- str_extract_all(names(images), "Patient[1-4]", simplify = TRUE)
indication <- meta$Indication[match(patient_id, meta$Sample.ID)] 

mcols(images) <- mcols(masks) <- DataFrame(sample_id = names(images),
                                           patient_id = patient_id,
                                           indication = indication)

The step took 0 minutes.

3.3 Spillover correction

sce <- readSCEfromTXT("data/compensation/")
## Spotted channels:  Y89, In113, In115, Pr141, Nd142, Nd143, Nd144, Nd145, Nd146, Sm147, Nd148, Sm149, Nd150, Eu151, Sm152, Eu153, Sm154, Gd155, Gd156, Gd158, Tb159, Gd160, Dy161, Dy162, Dy163, Dy164, Ho165, Er166, Er167, Er168, Tm169, Er170, Yb171, Yb172, Yb173, Yb174, Lu175, Yb176
## Acquired channels:  Ar80, Y89, In113, In115, Xe131, Xe134, Ba136, La138, Pr141, Nd142, Nd143, Nd144, Nd145, Nd146, Sm147, Nd148, Sm149, Nd150, Eu151, Sm152, Eu153, Sm154, Gd155, Gd156, Gd158, Tb159, Gd160, Dy161, Dy162, Dy163, Dy164, Ho165, Er166, Er167, Er168, Tm169, Er170, Yb171, Yb172, Yb173, Yb174, Lu175, Yb176, Ir191, Ir193, Pt196, Pb206
## Channels spotted but not acquired:  
## Channels acquired but not spotted:  Ar80, Xe131, Xe134, Ba136, La138, Ir191, Ir193, Pt196, Pb206
assay(sce, "exprs") <- asinh(counts(sce)/5)

The step took 0.09 minutes.

plotSpotHeatmap(sce)

sce2 <- binAcrossPixels(sce, bin_size = 10)

The step took 0.37 minutes.

library(CATALYST)

bc_key <- as.numeric(unique(sce$sample_mass))
bc_key <- bc_key[order(bc_key)]

sce <- assignPrelim(sce, bc_key = bc_key)
## Debarcoding data...
##  o ordering
##  o classifying events
## Normalizing...
## Computing deltas...
sce <- estCutoffs(sce)
sce <- applyCutoffs(sce)

sce <- filterPixels(sce, minevents = 40, correct_pixels = TRUE)

The step took 0.81 minutes.

sce <- computeSpillmat(sce)
sm <- metadata(sce)$spillover_matrix

The step took 0.07 minutes.

library(dittoSeq)
library(patchwork)
    
rowData(spe)$channel_name <- paste0(rowData(spe)$channel, "Di")

isotope_list <- CATALYST::isotope_list
isotope_list$Ar <- 80

spe <- compCytof(spe, sm, 
                 transform = TRUE, cofactor = 1,
                 isotope_list = isotope_list, 
                 overwrite = FALSE)

before <- dittoScatterPlot(spe, x.var = "Ecad", y.var = "CD303",
                           assay.x = "exprs", assay.y = "exprs") +
    ggtitle("Before compensation")

after <- dittoScatterPlot(spe, x.var = "Ecad", y.var = "CD303",
                          assay.x = "compexprs", assay.y = "compexprs") +
    ggtitle("After compensation")

before + after

assay(spe, "counts") <- assay(spe, "compcounts") 
assay(spe, "exprs") <- assay(spe, "compexprs") 
assay(spe, "compcounts") <- assay(spe, "compexprs") <- NULL
channelNames(images) <- rowData(spe)$channel_name
adapted_sm <- adaptSpillmat(sm, paste0(rowData(spe)$channel, "Di"), 
                            isotope_list = isotope_list)
## Compensation is likely to be inaccurate.
## Spill values for the following interactions
## have not been estimated:
## Ir191Di -> Ir193Di
## Ir193Di -> Ir191Di
images_comp <- compImage(images, adapted_sm)

plotPixels(images[5], colour_by = "Yb173Di", 
           image_title = list(text = "Yb173 (Ecad) - before", 
                       position = "topleft"), 
           legend = NULL, bcg = list(Yb173Di = c(0, 4, 1)))

plotPixels(images[5], colour_by = "Yb174Di", 
           image_title = list(text = "Yb174 (CD303) - before", 
                              position = "topleft"), 
           legend = NULL, bcg = list(Yb174Di = c(0, 4, 1)))

plotPixels(images_comp[5], colour_by = "Yb173Di",
           image_title = list(text = "Yb173 (Ecad) - after", 
                              position = "topleft"), 
           legend = NULL, bcg = list(Yb173Di = c(0, 4, 1)))

plotPixels(images_comp[5], colour_by = "Yb174Di", 
           image_title = list(text = "Yb174 (CD303) - after", 
                              position = "topleft"),
           legend = NULL, bcg = list(Yb174Di = c(0, 4, 1)))

channelNames(images_comp) <- rownames(spe)

The step took 12.53 minutes.

3.4 Quality control

set.seed(20220118)
img_ids <- sample(seq_len(length(images_comp)), 3)

cur_images <- images_comp[img_ids]
cur_images <- normalize(cur_images, separateImages = TRUE)
cur_images <- normalize(cur_images, inputRange = c(0, 0.2))

plotPixels(cur_images,
           mask = masks[img_ids],
           img_id = "sample_id",
           missing_colour = "white",
           colour_by = c("CD163", "CD20", "CD3", "Ecad", "DNA1"),
           colour = list(CD163 = c("black", "yellow"),
                         CD20 = c("black", "red"),
                         CD3 = c("black", "green"),
                         Ecad = c("black", "cyan"),
                         DNA1 = c("black", "blue")),
           image_title = NULL,
           legend = list(colour_by.title.cex = 0.9,
                         colour_by.labels.cex = 0.9))

colData(spe) %>%
    as.data.frame() %>%
    group_by(sample_id) %>%
    ggplot() +
        geom_boxplot(aes(sample_id, area)) +
        theme_minimal(base_size = 15) + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
        ylab("Cell area") + xlab("")

spe <- spe[,spe$area >= 5]
multi_dittoPlot(spe, vars = c("HLADR", "CD3", "Ecad", "PDGFRb"),
               group.by = "patient_id", plots = c("ridgeplot"), 
               assay = "exprs")
## Picking joint bandwidth of 0.163
## Picking joint bandwidth of 0.0992
## Picking joint bandwidth of 0.151
## Picking joint bandwidth of 0.0895

library(scater)
## Loading required package: scuttle
set.seed(220225)
spe <- runUMAP(spe, subset_row = rowData(spe)$use_channel, 
 exprs_values = "exprs") 

dittoDimPlot(spe, var = "patient_id", 
     reduction.use = "UMAP", size = 0.2)  +
    ggtitle("Patient ID on UMAP")

The step took 1.13 minutes.

library(batchelor)
set.seed(220228)
out <- fastMNN(spe, batch = spe$patient_id,
               auto.merge = TRUE,
               subset.row = rowData(spe)$use_channel,
               assay.type = "exprs")
## Warning in check_numbers(k = k, nu = nu, nv = nv, limit = min(dim(x)) - : more
## singular values/vectors requested than available
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.
reducedDim(spe, "fastMNN") <- reducedDim(out, "corrected")

spe <- runUMAP(spe, dimred= "fastMNN", name = "UMAP_mnnCorrected")

dittoDimPlot(spe, var = "patient_id", 
                   reduction.use = "UMAP_mnnCorrected", size = 0.2) + 
    ggtitle("Patient ID on UMAP after correction")

The step took 3.49 minutes.

3.5 Cell phenotyping

library(bluster)
library(BiocParallel)

mat <- reducedDim(spe, "fastMNN")

set.seed(220825)
combinations <- clusterSweep(mat, BLUSPARAM=SNNGraphParam(),
                             k=c(10L, 20L), 
                             type = c("rank", "jaccard"), 
                             cluster.fun = "louvain")

sil <- vapply(as.list(combinations$clusters), 
              function(x) mean(approxSilhouette(mat, x)$width), 0)

ggplot(data.frame(method = names(sil),
                  sil = sil)) +
    geom_point(aes(method, sil)) +
    theme_classic(base_size = 15) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("Cluster parameter combination") +
    ylab("Average silhouette width")

The step took 3.78 minutes.

library(scran)

clusters <- clusterCells(spe[rowData(spe)$use_channel,], 
                         use.dimred = "fastMNN", 
                         BLUSPARAM = SNNGraphParam(k=20, 
                                                cluster.fun = "louvain",
                                                type = "rank",
                                                BPPARAM = bpparam()))
spe$nn_clusters <- clusters

The step took 0.9 minutes.

if (interactive()){
    cytomapperShiny(object = spe, mask = masks, image = images_comp, 
                cell_id = "ObjectNumber", img_id = "sample_id")   
}
library(SingleCellExperiment)
label_files <- list.files("data/gated_cells", 
                          full.names = TRUE, pattern = ".rds$")

spes <- lapply(label_files, readRDS)

concat_spe <- do.call("cbind", spes)
## 'sample_id's are duplicated across 'SpatialExperiment' objects to cbind; appending sample indices.
cur_tab <- unclass(table(colnames(concat_spe), 
                         concat_spe$cytomapper_CellLabel))
cur_labels <- rep("doublets", nrow(cur_tab))
names(cur_labels) <- rownames(cur_tab)

# Single assignments
single_index <- rowSums(cur_tab) == 1
cur_labels[single_index] <- colnames(cur_tab)[
apply(cur_tab[single_index,], 1, which.max)]

# Double assignment within the tumor
double_index <- rowSums(cur_tab) == 2 & cur_tab[,"Tumor"] == 1
no_tumor <- cur_tab[,colnames(cur_tab) != "Tumor"]
cur_labels[double_index] <- colnames(no_tumor)[
apply(no_tumor[double_index,], 1, which.max)]

# Remove doublets
cur_labels <- cur_labels[cur_labels != "doublets"]

# Transfer labels to SPE object
spe_labels <- rep("unlabeled", ncol(spe))
names(spe_labels) <- colnames(spe)
spe_labels[names(cur_labels)] <- cur_labels
spe$cell_labels <- spe_labels
library(caret)

# Split between labeled and unlabeled cells
lab_spe <- spe[,spe$cell_labels != "unlabeled"]
unlab_spe <- spe[,spe$cell_labels == "unlabeled"]

# Randomly split into train and test data
set.seed(220925)
trainIndex <- createDataPartition(factor(lab_spe$cell_labels), p = 0.75)
train_spe <- lab_spe[,trainIndex$Resample1]
test_spe <- lab_spe[,-trainIndex$Resample1]

fitControl <- trainControl(method = "cv",
                           number = 5)

cur_mat <- t(assay(train_spe, "exprs")[rowData(train_spe)$use_channel,])

rffit <- train(x = cur_mat, 
               y = factor(train_spe$cell_labels),
               method = "rf", ntree = 1000,
               tuneLength = 5,
               trControl = fitControl)

The step took 11.51 minutes.

cur_mat <- t(assay(test_spe, "exprs")[rowData(test_spe)$use_channel,])

cur_pred <- predict(rffit, 
                    newdata = cur_mat)

confusionMatrix(data = cur_pred, 
                      reference = factor(test_spe$cell_labels), 
                      mode = "everything")
## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Bcell BnTcell  CD4  CD8 Myeloid Neutrophil Plasma_cell Stroma
##   Bcell         196       0    0    0       0          0           3      0
##   BnTcell         1     400    0    0       0          0           2      0
##   CD4             0       0  166    0       0          0           6      2
##   CD8             0       0    0  193       0          0           9      1
##   Myeloid         0       0    3    2     423          0           0      0
##   Neutrophil      0       0    0    0       1         31           0      0
##   Plasma_cell     0       0    1    1       0          1         147      0
##   Stroma          0       0    1    1       0          0           0    115
##   Treg            0       0    0    0       0          0           1      0
##   Tumor           4       0    1    3       0          1           0      0
##              Reference
## Prediction    Treg Tumor
##   Bcell          0     0
##   BnTcell        0     0
##   CD4            0     0
##   CD8            0     8
##   Myeloid        1     0
##   Neutrophil     0     2
##   Plasma_cell    0     0
##   Stroma         0     0
##   Treg          92     0
##   Tumor          0  1449
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9829         
##                  95% CI : (0.9778, 0.987)
##     No Information Rate : 0.4465         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9773         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: Bcell Class: BnTcell Class: CD4 Class: CD8
## Sensitivity               0.97512         1.0000    0.96512    0.96500
## Specificity               0.99902         0.9990    0.99742    0.99413
## Pos Pred Value            0.98492         0.9926    0.95402    0.91469
## Neg Pred Value            0.99837         1.0000    0.99806    0.99771
## Precision                 0.98492         0.9926    0.95402    0.91469
## Recall                    0.97512         1.0000    0.96512    0.96500
## F1                        0.98000         0.9963    0.95954    0.93917
## Prevalence                0.06151         0.1224    0.05263    0.06120
## Detection Rate            0.05998         0.1224    0.05080    0.05906
## Detection Prevalence      0.06089         0.1233    0.05324    0.06457
## Balanced Accuracy         0.98707         0.9995    0.98127    0.97957
##                      Class: Myeloid Class: Neutrophil Class: Plasma_cell
## Sensitivity                  0.9976          0.939394            0.87500
## Specificity                  0.9979          0.999073            0.99903
## Pos Pred Value               0.9860          0.911765            0.98000
## Neg Pred Value               0.9996          0.999382            0.99326
## Precision                    0.9860          0.911765            0.98000
## Recall                       0.9976          0.939394            0.87500
## F1                           0.9918          0.925373            0.92453
## Prevalence                   0.1297          0.010098            0.05141
## Detection Rate               0.1294          0.009486            0.04498
## Detection Prevalence         0.1313          0.010404            0.04590
## Balanced Accuracy            0.9978          0.969233            0.93702
##                      Class: Stroma Class: Treg Class: Tumor
## Sensitivity                0.97458     0.98925       0.9931
## Specificity                0.99937     0.99969       0.9950
## Pos Pred Value             0.98291     0.98925       0.9938
## Neg Pred Value             0.99905     0.99969       0.9945
## Precision                  0.98291     0.98925       0.9938
## Recall                     0.97458     0.98925       0.9931
## F1                         0.97872     0.98925       0.9935
## Prevalence                 0.03611     0.02846       0.4465
## Detection Rate             0.03519     0.02815       0.4434
## Detection Prevalence       0.03580     0.02846       0.4461
## Balanced Accuracy          0.98697     0.99447       0.9941
cur_mat <- t(assay(unlab_spe, "exprs")[rowData(unlab_spe)$use_channel,])

cell_class <- as.character(predict.train(rffit, 
                                         newdata = cur_mat, 
                                         type = "raw"))
names(cell_class) <- rownames(cur_mat)

cell_prob <- predict.train(rffit, 
                           newdata = cur_mat, 
                           type = "prob")
cell_class[rowMax(as.matrix(cell_prob)) < 0.4] <- "undefined"

cell_labels <- spe$cell_labels
cell_labels[colnames(unlab_spe)] <- cell_class
spe$celltype <- cell_labels
p1 <- dittoDimPlot(spe, var = "celltype", 
             reduction.use = "UMAP_mnnCorrected", size = 0.2,
             do.label = TRUE) +
  ggtitle("Cell types on UMAP, integrated cells")

p2 <- dittoDimPlot(spe, var = "nn_clusters", 
             reduction.use = "UMAP_mnnCorrected", size = 0.2,
             do.label = TRUE) +
  ggtitle("Clusters on UMAP, integrated cells")

p1 + p2

library(scuttle)
library(viridis)
## Loading required package: viridisLite
celltype_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),  
                     ids = spe$celltype, 
                     statistics = "mean",
                     use.assay.type = "exprs",
                     subset_row = rowData(spe)$use_channel)

dittoHeatmap(celltype_mean,
             assay = "exprs", cluster_cols = TRUE, 
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = c("celltype","ncells"))

cluster_mean <- aggregateAcrossCells(as(spe, "SingleCellExperiment"),  
                     ids = spe$nn_clusters, 
                     statistics = "mean",
                     use.assay.type = "exprs",
                     subset_row = rowData(spe)$use_channel)

dittoHeatmap(cluster_mean,
             assay = "exprs", cluster_cols = TRUE, 
             scale = "none",
             heatmap.colors = viridis(100),
             annot.by = c("nn_clusters","ncells"))

3.6 Spatial analysis

library(igraph)

# Spatial community detection - tumor
tumor_spe <- spe[,spe$celltype == "Tumor"]

gr <- graph_from_data_frame(as.data.frame(colPair(tumor_spe, "neighborhood")), 
                            directed = FALSE, 
                            vertices = data.frame(index = seq_len(ncol(tumor_spe))))

cl_comm <- cluster_louvain(gr)
comm_tumor <- paste0("Tumor_", membership(cl_comm))
comm_tumor[membership(cl_comm) %in% which(sizes(cl_comm) < 10)] <- NA
names(comm_tumor) <- colnames(tumor_spe)

# Spatial community detection - non-tumor
stroma_spe <- spe[,spe$celltype != "Tumor"]

gr <- graph_from_data_frame(as.data.frame(colPair(stroma_spe, "neighborhood")), 
                            directed = FALSE, 
                            vertices = data.frame(index = seq_len(ncol(stroma_spe))))

cl_comm <- cluster_louvain(gr)
comm_stroma <- paste0("Stroma_", membership(cl_comm))
comm_stroma[membership(cl_comm) %in% which(sizes(cl_comm) < 10)] <- NA
names(comm_stroma) <- colnames(stroma_spe)

comm <- c(comm_tumor, comm_stroma)

spe$spatial_community <- comm[colnames(spe)]

The step took 0.03 minutes.

plotSpatial(spe[,spe$celltype == "Tumor"], 
            node_color_by = "spatial_community", 
            img_id = "sample_id", 
            node_size_fix = 0.5) +
    theme(legend.position = "none") +
    scale_color_manual(values = rev(colors()))

library(pheatmap)
spe <- buildSpatialGraph(spe, img_id = "sample_id", type = "knn", k = 20)


spe <- aggregateNeighbors(spe, colPairName = "knn_interaction_graph", 
                          aggregate_by = "metadata", count_by = "celltype")

set.seed(220705)

cn_1 <- kmeans(spe$aggregatedNeighbors, centers = 6)
spe$cn_celltypes <- as.factor(cn_1$cluster)

plotSpatial(spe, 
            node_color_by = "cn_celltypes", 
            img_id = "sample_id", 
            node_size_fix = 0.5) +
    scale_color_brewer(palette = "Set3")

mat <- colData(spe) %>% as_tibble() %>%
    group_by(cn_celltypes, celltype) %>%
    summarize(count = n()) %>%
    mutate(freq = count / sum(count)) %>%
    pivot_wider(id_cols = cn_celltypes, names_from = celltype, 
                values_from = freq, values_fill = 0) %>%
    ungroup() %>%
    select(-cn_celltypes)
## `summarise()` has grouped output by 'cn_celltypes'. You can override using the
## `.groups` argument.
pheatmap(mat, 
  color = colorRampPalette(c("dark blue", "white", "dark red"))(100), 
  scale = "column")

The step took 0.23 minutes.

spe <- buildSpatialGraph(spe, img_id = "sample_id", 
                         type = "knn", 
                         name = "knn_spatialcontext_graph", 
                         k = 40)

spe <- aggregateNeighbors(spe, 
                          colPairName = "knn_spatialcontext_graph",
                          aggregate_by = "metadata",
                          count_by = "cn_celltypes",
                          name = "aggregatedNeighborhood")

spe <- detectSpatialContext(spe, 
                            entry = "aggregatedNeighborhood",
                            threshold = 0.90,
                            name = "spatial_context")

spe <- filterSpatialContext(spe, 
                            entry = "spatial_context",
                            group_by = "patient_id", 
                            group_threshold = 3,
                            cells_threshold = 100,
                            name = "spatial_context_filtered")

plotSpatial(spe, 
            node_color_by = "spatial_context_filtered", 
            img_id = "sample_id", 
            node_size_fix = 0.5, 
            colPairName = "knn_spatialcontext_graph")

plotSpatialContext(spe,
                   entry = "spatial_context_filtered",
                   group_by = "sample_id",
                   node_color_by = "n_cells",
                   node_size_by = "n_group",
                   node_label_color_by = "n_cells") +
    scale_color_viridis()

The step took 0.36 minutes.

spe <- patchDetection(spe, 
                      patch_cells = spe$celltype == "Tumor",
                      img_id = "sample_id",
                      expand_by = 1,
                      min_patch_size = 10,
                      colPairName = "neighborhood")

plotSpatial(spe, 
            node_color_by = "patch_id", 
            img_id = "sample_id", 
            node_size_fix = 0.5) +
    theme(legend.position = "none") +
    scale_color_manual(values = rev(colors()))

The step took 0.44 minutes.

library(scales)

set.seed(220825)
out <- testInteractions(spe, 
                        group_by = "sample_id",
                        label = "celltype", 
                        colPairName = "neighborhood")

out %>% as_tibble() %>%
    group_by(from_label, to_label) %>%
    summarize(sum_sigval = sum(sigval, na.rm = TRUE)) %>%
    ggplot() +
        geom_tile(aes(from_label, to_label, fill = sum_sigval)) +
        scale_fill_gradient2(low = muted("blue"), mid = "white", 
  high = muted("red")) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))

The step took 7.98 minutes.

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] scales_1.2.1                pheatmap_1.0.12            
##  [3] igraph_1.3.5                viridis_0.6.2              
##  [5] viridisLite_0.4.1           caret_6.0-93               
##  [7] lattice_0.20-45             scran_1.24.1               
##  [9] BiocParallel_1.30.3         bluster_1.6.0              
## [11] batchelor_1.12.3            scater_1.24.0              
## [13] scuttle_1.6.3               patchwork_1.1.2            
## [15] dittoSeq_1.8.1              CATALYST_1.20.1            
## [17] cytomapper_1.9.1            EBImage_4.38.0             
## [19] forcats_0.5.2               stringr_1.4.1              
## [21] dplyr_1.0.10                purrr_0.3.4                
## [23] readr_2.1.2                 tidyr_1.2.1                
## [25] tibble_3.1.8                ggplot2_3.3.6              
## [27] tidyverse_1.3.2             openxlsx_4.2.5             
## [29] imcRtools_1.3.7             SpatialExperiment_1.6.1    
## [31] SingleCellExperiment_1.18.0 SummarizedExperiment_1.26.1
## [33] Biobase_2.56.0              GenomicRanges_1.48.0       
## [35] GenomeInfoDb_1.32.4         IRanges_2.30.1             
## [37] S4Vectors_0.34.0            BiocGenerics_0.42.0        
## [39] MatrixGenerics_1.8.1        matrixStats_0.62.0         
## [41] BiocStyle_2.24.0           
## 
## loaded via a namespace (and not attached):
##   [1] rsvd_1.0.5                  svglite_2.1.0              
##   [3] class_7.3-20                fftwtools_0.9-11           
##   [5] V8_4.2.1                    foreach_1.5.2              
##   [7] crayon_1.5.1                MASS_7.3-58.1              
##   [9] rhdf5filters_1.8.0          nlme_3.1-159               
##  [11] backports_1.4.1             reprex_2.0.2               
##  [13] rlang_1.0.6                 XVector_0.36.0             
##  [15] readxl_1.4.1                irlba_2.3.5                
##  [17] limma_3.52.3                rjson_0.2.21               
##  [19] bit64_4.0.5                 glue_1.6.2                 
##  [21] parallel_4.2.1              vipor_0.4.5                
##  [23] classInt_0.4-7              shinydashboard_0.7.2       
##  [25] haven_2.5.1                 tidyselect_1.1.2           
##  [27] distances_0.1.8             XML_3.99-0.10              
##  [29] zoo_1.8-11                  sf_1.0-8                   
##  [31] ggpubr_0.4.0                nnls_1.4                   
##  [33] xtable_1.8-4                magrittr_2.0.3             
##  [35] evaluate_0.16               cli_3.4.1                  
##  [37] zlibbioc_1.42.0             rstudioapi_0.14            
##  [39] sp_1.5-0                    rpart_4.1.16               
##  [41] bslib_0.4.0                 shiny_1.7.2                
##  [43] BiocSingular_1.12.0         xfun_0.33                  
##  [45] clue_0.3-61                 cluster_2.1.4              
##  [47] tidygraph_1.2.2             ggrepel_0.9.1              
##  [49] listenv_0.8.0               future_1.28.0              
##  [51] png_0.1-7                   ipred_0.9-13               
##  [53] withr_2.5.0                 bitops_1.0-7               
##  [55] aws.signature_0.6.0         ggforce_0.3.4              
##  [57] RBGL_1.72.0                 plyr_1.8.7                 
##  [59] cellranger_1.1.0            ncdfFlow_2.42.1            
##  [61] RTriangle_1.6-0.10          hardhat_1.2.0              
##  [63] pROC_1.18.0                 e1071_1.7-11               
##  [65] dqrng_0.3.0                 pillar_1.8.1               
##  [67] RcppParallel_5.1.5          GlobalOptions_0.1.2        
##  [69] cachem_1.0.6                multcomp_1.4-20            
##  [71] fs_1.5.2                    CytoML_2.8.1               
##  [73] raster_3.6-3                GetoptLong_1.0.5           
##  [75] DelayedMatrixStats_1.18.0   vctrs_0.4.1                
##  [77] ellipsis_0.3.2              generics_0.1.3             
##  [79] lava_1.6.10                 tools_4.2.1                
##  [81] beeswarm_0.4.0              munsell_0.5.0              
##  [83] tweenr_2.0.2                aws.s3_0.3.21              
##  [85] proxy_0.4-27                DelayedArray_0.22.0        
##  [87] fastmap_1.1.0               compiler_4.2.1             
##  [89] abind_1.4-5                 httpuv_1.6.6               
##  [91] prodlim_2019.11.13          GenomeInfoDbData_1.2.8     
##  [93] gridExtra_2.3               edgeR_3.38.4               
##  [95] ggnewscale_0.4.7            ggpointdensity_0.1.0       
##  [97] deldir_1.0-6                utf8_1.2.2                 
##  [99] later_1.3.0                 recipes_1.0.1              
## [101] jsonlite_1.8.0              concaveman_1.1.0           
## [103] graph_1.74.0                ScaledMatrix_1.4.1         
## [105] carData_3.0-5               sparseMatrixStats_1.8.0    
## [107] promises_1.2.0.1            car_3.1-0                  
## [109] doParallel_1.0.17           latticeExtra_0.6-30        
## [111] R.utils_2.12.0              rmarkdown_2.16             
## [113] sandwich_3.0-2              cowplot_1.1.1              
## [115] textshaping_0.3.6           statmod_1.4.37             
## [117] Rtsne_0.16                  uwot_0.1.14                
## [119] HDF5Array_1.24.2            survival_3.4-0             
## [121] ResidualMatrix_1.6.1        yaml_2.3.5                 
## [123] plotrix_3.8-2               systemfonts_1.0.4          
## [125] cytolib_2.8.0               flowWorkspace_4.8.0        
## [127] htmltools_0.5.3             locfit_1.5-9.6             
## [129] graphlayouts_0.8.1          digest_0.6.29              
## [131] assertthat_0.2.1            mime_0.12                  
## [133] tiff_0.1-11                 units_0.8-0                
## [135] future.apply_1.9.1          data.table_1.14.2          
## [137] R.oo_1.25.0                 flowCore_2.8.0             
## [139] drc_3.0-1                   ragg_1.2.2                 
## [141] splines_4.2.1               labeling_0.4.2             
## [143] Rhdf5lib_1.18.2             googledrive_2.0.0          
## [145] RCurl_1.98-1.8              broom_1.0.1                
## [147] hms_1.1.2                   modelr_0.1.9               
## [149] rhdf5_2.40.0                colorspace_2.0-3           
## [151] DropletUtils_1.16.0         ConsensusClusterPlus_1.60.0
## [153] base64enc_0.1-3             BiocManager_1.30.18        
## [155] ggbeeswarm_0.6.0            shape_1.4.6                
## [157] nnet_7.3-17                 sass_0.4.2                 
## [159] Rcpp_1.0.9                  bookdown_0.29              
## [161] mvtnorm_1.1-3               circlize_0.4.15            
## [163] FlowSOM_2.4.0               RProtoBufLib_2.8.0         
## [165] fansi_1.0.3                 tzdb_0.3.0                 
## [167] ModelMetrics_1.2.2.2        parallelly_1.32.1          
## [169] R6_2.5.1                    grid_4.2.1                 
## [171] ggridges_0.5.4              lifecycle_1.0.2            
## [173] zip_2.2.1                   curl_4.3.2                 
## [175] ggsignif_0.6.3              googlesheets4_1.0.1        
## [177] jquerylib_0.1.4             Matrix_1.5-1               
## [179] RcppAnnoy_0.0.19            TH.data_1.1-1              
## [181] RColorBrewer_1.1-3          iterators_1.0.14           
## [183] gower_1.0.0                 svgPanZoom_0.3.4           
## [185] htmlwidgets_1.5.4           beachmat_2.12.0            
## [187] polyclip_1.10-0             terra_1.6-17               
## [189] rvest_1.0.3                 ComplexHeatmap_2.12.1      
## [191] globals_0.16.1              codetools_0.2-18           
## [193] lubridate_1.8.0             randomForest_4.7-1.1       
## [195] metapod_1.4.0               gtools_3.9.3               
## [197] dbplyr_2.2.1                R.methodsS3_1.8.2          
## [199] gtable_0.3.1                DBI_1.1.3                  
## [201] httr_1.4.4                  highr_0.9                  
## [203] KernSmooth_2.23-20          stringi_1.7.8              
## [205] vroom_1.5.7                 reshape2_1.4.4             
## [207] farver_2.1.1                hexbin_1.28.2              
## [209] Rgraphviz_2.40.0            timeDate_4021.104          
## [211] magick_2.7.3                DT_0.25                    
## [213] xml2_1.3.3                  colorRamps_2.3.1           
## [215] ggcyto_1.24.1               BiocNeighbors_1.14.0       
## [217] interp_1.1-3                scattermore_0.8            
## [219] bit_4.0.4                   jpeg_0.1-9                 
## [221] ggraph_2.0.6                pkgconfig_2.0.3            
## [223] gargle_1.2.1                rstatix_0.7.0              
## [225] knitr_1.40